---
title: "Untitled"
output: html_document
---

############################################
Title: 4_01_Table_1.Rmd
Purpose: This file creates Table 1 
Last updated: 30-4-2024

Input:
    - Loan panel data. Latest version: "final_panel_loans.csv"
    - Bond panel data. Latest version: "final_panel_bonds.csv"
    - Compustat data. Latest version: "final_panel_compustat.csv"
    
Output:
    - Table 1
############################################


0. Setup *Set your file path here*
----------------------------------
```{r, results="hide", include=FALSE}
rm(list=ls())
require(quantmod); require(dynlm); require(AER); require(vars); require(forecast); require(stargazer); require(strucchange); require(xts); require(lubridate); require(fGarch); require(zoo); require(timeSeries); library(sandwich); require(car);library(fBasics); library(ggplot2); require(dplyr); require(tidyverse); library(readxl); library(sandwich); library(QuantPsyc); library(corrplot);library(plm); library(ggplot2); library(modelr) 
library(ggpubr)

options(scipen=999)

#Assign dplyr verbs  
select <- dplyr::select
rename <- dplyr::rename
mutate <- dplyr::mutate
filter <- dplyr::filter
arrange <- dplyr::arrange
distinct <- dplyr::distinct
group_by <- dplyr::group_by
summarise <- dplyr::summarise
lag <- dplyr::lag

#Define directories
global <- ".../2024_RFS_Replication_TOSUBMIT"

```



1. Data  
---------------------------------

1.1 Loan market composition data 
```{r, results="hide", include=FALSE}

#Load
LSTA_monthly_ytm <- read_csv(paste0(global, "/Data/final_panel_loans.csv", ""), 
    col_types = cols(...1 = col_skip(), date_m = col_date(format = "%Y-%m-%d")))


```

1.2 Bond market composition data
```{r}

TRACE_monthly_ytm_1999_2020 <- read_csv(paste0(global, "/Data/final_panel_bonds.csv", ""), 
    col_types = cols(...1 = col_skip(), date_m = col_date(format = "%Y-%m-%d")))

```


2. Panel A 
--------------------------------

2.1 Loan Panel A
```{r, results="hide", include=FALSE}

check <- LSTA_monthly_ytm %>% 
  group_by(BorrowerName) %>% 
  summarise(n=n())
#3,773 unique issuer ids

check <- LSTA_monthly_ytm %>% 
  filter(!is.na(gvkey_comb)) %>% 
  group_by(BorrowerName) %>% 
  summarise(n=n())
#2,030 public issuers

check <- LSTA_monthly_ytm%>% 
  filter(is.na(gvkey_comb)) %>% 
  group_by(BorrowerName) %>% 
  summarise(n=n())
#1,763 unique issuer_ids without gvkeys

check <- LSTA_monthly_ytm %>% 
  filter(!is.na(gvkey_comb)) %>% 
  group_by(gvkey_comb) %>% 
  summarise(n=n())
#1,776 unique gvkeys amongst pubblic issuers

```

2.2 Bond Panel A
```{r, results="hide", include=FALSE}

check <- TRACE_monthly_ytm_1999_2020 %>% 
  group_by(ISSUER_ID) %>% 
  summarise(n=n())
#2,917 unique issuer ids

check <- TRACE_monthly_ytm_1999_2020 %>% 
  filter(!is.na(gvkey_ccl)) %>% 
  group_by(ISSUER_ID) %>% 
  summarise(n=n())
#2,205 public issuers

check <- TRACE_monthly_ytm_1999_2020 %>% 
  filter(is.na(gvkey_ccl)) %>% 
  group_by(ISSUER_ID) %>% 
  summarise(n=n())
#904 unique issuer_ids without gvkeys

check <- TRACE_monthly_ytm_1999_2020 %>% 
  filter(!is.na(gvkey_ccl)) %>% 
  group_by(gvkey_ccl) %>% 
  summarise(n=n())
#1,610 unique gvkeys amongst pubblic issuers

```



3. Combined Panels (Panel B + C) 
-----------------------------------

3.1 Combined list of gvkeys
```{r, results="hide", include=FALSE}

#Loans

#Keep unique loan issuers 3,773
issuer_list <-  LSTA_monthly_ytm %>% 
  ungroup() %>% 
  select(gvkey_comb, BorrowerName) %>% 
  distinct(BorrowerName, .keep_all = TRUE) %>% 
  filter(!is.na(BorrowerName))


#Keep unique loan issuers with GVKEY 1,776
loan_borrower_list <- LSTA_monthly_ytm %>% 
    ungroup() %>% 
    select(gvkey_comb, BorrowerName, age) %>% 
    distinct(gvkey_comb, .keep_all = TRUE) %>% 
    filter(!is.na(gvkey_comb))

#Keep unique loan issuer gvkeys
loan_borrower_list <-  loan_borrower_list %>%
  ungroup() %>% 
  select(gvkey_comb) %>% 
  distinct(gvkey_comb) %>% 
  mutate(loan = 1)



#Bonds

#Keep unique bond issuers 1,610
bond_borrower_list <-  TRACE_monthly_ytm_1999_2020 %>% 
  select(ISSUER_ID, gvkey_ccl, PROSPECTUS_ISSUER_NAME) %>% 
  distinct(gvkey_ccl, .keep_all = TRUE) %>% 
  filter(!is.na(gvkey_ccl))


bond_borrower_list <-  bond_borrower_list %>%
  ungroup() %>% 
  select(gvkey_ccl) %>% 
  distinct(gvkey_ccl) %>% 
  mutate(bond = 1)


#Combine
all_gvkey <- full_join(bond_borrower_list, loan_borrower_list, by=c("gvkey_ccl"="gvkey_comb"))



check <- all_gvkey %>% 
  filter(bond == 1 )
#1610

check <- all_gvkey %>% 
  filter(loan == 1)
#1778

check <- all_gvkey %>% 
  filter(loan == 1 & bond ==1) 
#569



```


Bond Side:
------------------------------------

Table 1 Bond - Panel B
```{r, results="hide", include=FALSE}

#List of gvkeys
all_gvkey <- read_csv(paste0(global, "/Data/final_panel_compustat.csv", ""))

all_gvkey <- all_gvkey %>% 
  select(gvkey, bond, loan, age, avg_at) %>% 
  distinct(gvkey, .keep_all = T ) %>% 
  filter(bond == 1 | loan == 1)



all_gvkey %>% 
  filter(bond == 1) %>% 
  mutate(mean_age = mean(age, na.rm = T))


#Age: 

#Under 5years
a <-  all_gvkey %>% 
  filter(bond == 1) %>% 
  filter(age <= 5) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(a$gvkey)

#5-10years
bond_borrowers_under_5_10 <-  all_gvkey  %>% 
  filter(bond == 1 & age > 5 & age <= 10) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_5_10$gvkey)

#10-20 years
bond_borrowers_under_10_20 <-  all_gvkey  %>% 
  filter(bond == 1 & age > 10 & age <= 20) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_10_20$gvkey)

# >20 years
bond_borrowers_under_20 <-  all_gvkey %>% 
  filter(bond == 1 & age > 20) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_20$gvkey)

#Calculate distribution
age_df <- as.data.frame(rbind(a,b,c,d))
colnames(age_df)[1]="age"
age_df <- age_df%>% 
  mutate(total_age = sum(age)) %>% 
  mutate(proportion = age/total_age) %>% 
  mutate(check=sum(proportion))



#Age + Overlap: 

#Under 5years
a <-  all_gvkey %>% 
  filter(bond == 1) %>% 
  filter(loan == 1 ) %>% 
  filter(age <= 5) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(a$gvkey)

#5-10years
bond_borrowers_under_5_10 <-  all_gvkey  %>% 
  filter(bond == 1 & loan == 1 & age > 5 & age <= 10) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_5_10$gvkey)

#10-20 years
bond_borrowers_under_10_20 <-  all_gvkey  %>% 
  filter(bond == 1 & loan == 1 & age > 10 & age <= 20) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_10_20$gvkey)

# >20 years
bond_borrowers_under_20 <-  all_gvkey %>% 
  filter(bond == 1 & loan == 1 & age > 20) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_20$gvkey)

#Calculate distribution
age_df <- as.data.frame(rbind(a,b,c,d))
colnames(age_df)[1]="age"
age_df <- age_df%>% 
  mutate(total_age = sum(age)) %>% 
  mutate(proportion = age/total_age) %>% 
  mutate(check=sum(proportion))



```

Table 1 Bond - Panel C
```{r, results="hide", include=FALSE}

#Size:

#Under 2bill
bond_borrowers_under_2 <-  all_gvkey %>% 
  filter(bond == 1 & avg_at <= 2000) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(bond_borrowers_under_2$gvkey)

#5-10bill
bond_borrowers_under_2_6 <-  all_gvkey %>% 
  filter(bond == 1 & avg_at> 2000 & avg_at <= 6000  ) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_2_6$gvkey)

#10-20bill
bond_borrowers_under_6_10 <-  all_gvkey  %>% 
  filter(bond == 1 & avg_at> 6000 & avg_at <= 10000  ) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_6_10$gvkey)

# >10bill
bond_borrowers_under_10 <-  all_gvkey  %>% 
  filter(bond == 1 & avg_at > 10000) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_10$gvkey)

#Calculate distribution
size_df <- as.data.frame(rbind(a,b,c,d))
colnames(size_df)[1]="size"
size_df <- size_df%>% 
  mutate(total_size = sum(size)) %>% 
  mutate(proportion = size/total_size) %>% 
  mutate(check=sum(proportion))


#Size + Overlap:

#Under 2bill
bond_borrowers_under_2 <-  all_gvkey %>% 
  filter(bond == 1 & loan == 1 & avg_at <= 2000) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(bond_borrowers_under_2$gvkey)

#5-10bill
bond_borrowers_under_2_6 <-  all_gvkey %>% 
  filter(bond == 1 & loan == 1 & avg_at> 2000 & avg_at <= 6000  ) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_2_6$gvkey)

#10-20bill
bond_borrowers_under_6_10 <-  all_gvkey  %>% 
  filter(bond == 1 & loan == 1 & avg_at> 6000 & avg_at <= 10000  ) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_6_10$gvkey)

# >10bill
bond_borrowers_under_10 <-  all_gvkey  %>% 
  filter(bond == 1 & loan == 1 & avg_at > 10000) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_10$gvkey)

#Calculate distribution
size_df <- as.data.frame(rbind(a,b,c,d))
colnames(size_df)[1]="size"
size_df <- size_df%>% 
  mutate(total_size = sum(size)) %>% 
  mutate(proportion = size/total_size) %>% 
  mutate(check=sum(proportion))


```


Loan Side:
-------------------------------------------

Table 1 Loan - Panel B
```{r, results="hide", include=FALSE}
#Age: 

#Under 5years
bond_borrowers_under_5 <-  all_gvkey %>% 
  filter(loan == 1 & age <= 5) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(bond_borrowers_under_5$gvkey)

#5-10years
bond_borrowers_under_5_10 <-  all_gvkey  %>% 
  filter(loan == 1 & age > 5 & age <= 10) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_5_10$gvkey)

#10-20 years
bond_borrowers_under_10_20 <-  all_gvkey  %>% 
  filter(loan == 1 & age > 10 & age <= 20) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_10_20$gvkey)

# >20 years
bond_borrowers_under_20 <-  all_gvkey %>% 
  filter(loan == 1 & age > 20) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_20$gvkey)

#Calculate distribution
age_df <- as.data.frame(rbind(a,b,c,d))
colnames(age_df)[1]="age"
age_df <- age_df%>% 
  mutate(total_age = sum(age)) %>% 
  mutate(proportion = age/total_age) %>% 
  mutate(check=sum(proportion))


#Age + overlap: 

#Under 5years
bond_borrowers_under_5 <-  all_gvkey %>% 
  filter(loan == 1 & bond ==1 & age <= 5) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(bond_borrowers_under_5$gvkey)

#5-10years
bond_borrowers_under_5_10 <-  all_gvkey  %>% 
  filter(loan == 1 & bond ==1 & age > 5 & age <= 10) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_5_10$gvkey)

#10-20 years
bond_borrowers_under_10_20 <-  all_gvkey  %>% 
  filter(loan == 1 & bond ==1 & age > 10 & age <= 20) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_10_20$gvkey)

# >20 years
bond_borrowers_under_20 <-  all_gvkey %>% 
  filter(loan == 1 & bond ==1 & age > 20) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_20$gvkey)

#Calculate distribution
age_df <- as.data.frame(rbind(a,b,c,d))
colnames(age_df)[1]="age"
age_df <- age_df%>% 
  mutate(total_age = sum(age)) %>% 
  mutate(proportion = age/total_age) %>% 
  mutate(check=sum(proportion))

```

Table 1 Loan - Panel C
```{r, results="hide", include=FALSE}

#Size:

#Under 2bill
bond_borrowers_under_2 <-  all_gvkey %>% 
  filter(loan == 1 & avg_at <= 2000) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(bond_borrowers_under_2$gvkey)

#5-10bill
bond_borrowers_under_2_6 <-  all_gvkey %>% 
  filter(loan == 1 & avg_at> 2000 & avg_at <= 6000  ) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_2_6$gvkey)

#10-20bill
bond_borrowers_under_6_10 <-  all_gvkey  %>% 
  filter(loan == 1 & avg_at> 6000 & avg_at <= 10000  ) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_6_10$gvkey)

# >10bill
bond_borrowers_under_10 <-  all_gvkey  %>% 
  filter(loan == 1 & avg_at > 10000) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_10$gvkey)

#Calculate distribution
size_df <- as.data.frame(rbind(a,b,c,d))
colnames(size_df)[1]="size"
size_df <- size_df%>% 
  mutate(total_size = sum(size)) %>% 
  mutate(proportion = size/total_size) %>% 
  mutate(check=sum(proportion))


#Size + Overlap:

#Under 2bill
bond_borrowers_under_2 <-  all_gvkey %>% 
  filter(loan == 1 & bond ==1 & avg_at <= 2000) %>% 
  distinct(gvkey, .keep_all = T)  
a <- length(bond_borrowers_under_2$gvkey)

#5-10bill
bond_borrowers_under_2_6 <-  all_gvkey %>% 
  filter(loan == 1 & bond ==1  & avg_at> 2000 & avg_at <= 6000  ) %>% 
  distinct(gvkey, .keep_all = T)  
b <- length(bond_borrowers_under_2_6$gvkey)

#10-20bill
bond_borrowers_under_6_10 <-  all_gvkey  %>% 
  filter(loan == 1 & bond ==1 & avg_at> 6000 & avg_at <= 10000  ) %>% 
  distinct(gvkey, .keep_all = T)  
c <- length(bond_borrowers_under_6_10$gvkey)

# >10bill
bond_borrowers_under_10 <-  all_gvkey  %>% 
  filter(loan == 1  & bond ==1 & avg_at > 10000) %>% 
  distinct(gvkey, .keep_all = T)  
d <- length(bond_borrowers_under_10$gvkey)

#Calculate distribution
size_df <- as.data.frame(rbind(a,b,c,d))
colnames(size_df)[1]="size"
size_df <- size_df%>% 
  mutate(total_size = sum(size)) %>% 
  mutate(proportion = size/total_size) %>% 
  mutate(check=sum(proportion))

```



 
 
 
 
 
 
 
 
 
 
 
 
 
